* This file will replicate the analysis for the Pennsylvania Experiment
* Save all files starting with "pa_" into a folder and set that folder 
* as the working directory to successfully execute the file.

* Pull in list of individuals from list who were matched as registered to vote before 3/28 deadline
import delimited "pa_prim_reg_wid.csv", clear
sort subject
save regvoterstomerge, replace

* Pull in list of individuals from list who were matched as primary voters and re-save for merging
import delimited "pa_prim_voters_wid.csv", clear
sort subject
save primvoterstomerge, replace

* Pull in list of individuals from list who had never previously voted and re-save for merging
import delimited "pa_prim_nevervoted_wid.csv", clear
sort subject
save primnevervotedtomerge, replace

* Pull in list of individuals from list and their gender and vote propensity 
import delimited "pa_all_gender.csv", clear
sort subject
save gendertomerge, replace

* Pull in list of individuals from list and their race and age
import delimited "pa_all_race.csv", clear
sort subject
save racetomerge, replace

* Pull in list of individuals from list and their partisanship score
import delimited "pa_all_partisanship.csv", clear
sort subject
save partisanshiptomerge, replace


* Pull in master list of individuals contacted and then merge in supplementary data from Catalist
import delimited "pa_pledge_id.csv", clear
sort subject

merge 1:1 subject using gendertomerge
sort subject
drop _m
merge 1:1 subject using pa_canvassers
sort subject
drop _m
merge 1:1 subject using racetomerge
sort subject
drop _m
merge 1:1 subject using partisanshiptomerge
sort subject
drop _m

gen votepropensity=voteprop1 if voteprop1~=.
replace votepropensity=voteprop2 if voteprop2~=.
replace votepropensity=voteprop3 if voteprop3~=.

replace female=0 if male==1

gen age=age1 if age1~=.
replace age=age2 if age2~=.
replace age=age3 if age3~=.
replace age=age4 if age4~=.
replace age=age5 if age5~=.

gen race=1 if caucasian==1
replace race=2 if black==1
replace race=3 if hispanic==1
replace race=4 if asian==1

* Create an indicator for whether the subject was canvassed  before the registration deadline
gen contactbeforecutoff=1 if DateCanvassed<=20541

* Merge in dataset of individuals who were registered to vote before 3/28 deadline
* This dataset includes indicators for party subject is registered with
merge 1:1 subject using regvoterstomerge

* Generate indicator for whether subject was registered before deadline
gen regbeforedeadline=1 if _m==3
drop _m

* Merge in list of subject ids for those who voted in the primary
sort subject
merge 1:1 subject using primvoterstomerge

* Generate indicator for whether subject voted in the primary
recode _m 3=1 1=0, gen(primvote)
drop _m

* Merge in list of subject ids for those who have not previously voted
sort subject
merge 1:1 subject using primnevervotedtomerge

* Create indicator for whether subject has not previously voted
gen nevervoted=1 if _m==3
replace nevervoted=1 if regbeforedeadline!=1
recode nevervoted .=0
drop _m

* Drop subjects who signed both a pledge card and petition
drop if pledge_card=="Yes" & signed_petition=="Yes"

* Generate indicator for whether subject pledged to vote or signed petition
encode pledge_card, gen(pledged)

* Create group variable for clustering
gen date= pledgecardcontactdate if pledge_card=="Yes"
replace date= petitioncontactdate if pledge_card=="No"
egen datecond=concat(date pledge_card)
encode datecond, gen(group)

* Create group variable for clustering by canvasser/day/school
egen datecond2=concat(DateCanvassed CanvassedBy)
encode datecond2, gen(group2)

**** TABLE 2 RESULTS ****

* Table 2, Line 1
* Difference of proportions tests for PA (with clustered standard errors, no subsetting)
proportion primvote, over(pledged) vce(cluster group2)
test [_prop_2]Yes=[_prop_2]No

* Table 2, Line 2
* Difference of proportions tests for PA (with clustered standard errors, subsetting to only those who could have been treated)
proportion primvote if contactbeforecutoff==1 | regbeforedeadline==1, over(pledged) vce(cluster group2)
test [_prop_2]Yes=[_prop_2]No

* Table 2, Line 3
* Difference of proportions tests for PA previous nonvoters (with clustered standard errors, subsetting to only those who could have been treated)
proportion primvote if (contactbeforecutoff==1 | regbeforedeadline==1)  & nevervoted==1, over(pledged) vce(cluster group2)
test [_prop_2]Yes=[_prop_2]No

* Reported in text only
* Interaction effects
reg primvote pledged##nevervoted if regbeforedeadline==1 | contactbeforecutoff==1 ,  vce(cluster group2)

**** RESULTS IN SUPPORTING INFORMATION ****

* S1 Table Results, Column 1 Line 1
areg primvote pledged if contactbeforecutoff==1 | regbeforedeadline==1 ,  absorb(CanvassedBy) vce(cluster group2)
* S1 Table Results, Column 1 Line 2
areg primvote pledged if (contactbeforecutoff==1 | regbeforedeadline==1)  & nevervoted==1,  absorb(CanvassedBy) vce(cluster group2)

* S2 Table Results, Column 1
* Only individuals below the age of 30
reg primvote pledged if (contactbeforecutoff==1 | regbeforedeadline==1) & (age<30 | age==.), vce(cluster group2)


